import xml.dom.minidom as xmldom
import os

#voc数据集获取所有标签的所有类别数"
annotation_path="BDD100kutils/busl1"
JPEGImages_path="BDD100kutils/bus1"

annotation_names=[os.path.join(annotation_path,i) for i in os.listdir(annotation_path)]
labels = list()
count=0
for names in annotation_names:
    xmlfilepath = names
    jpg_name_prix = xmlfilepath.split('/')[-1]  # 获取图片名
    jpg_name=jpg_name_prix.split('.')[-2]+'.jpg'

    imagepath = JPEGImages_path+'/'+jpg_name
    domobj = xmldom.parse(xmlfilepath)
    # 得到元素对象
    elementobj = domobj.documentElement
    #获得子标签
    subElementObj = elementobj.getElementsByTagName("object")
    for s in subElementObj:
        label=s.getElementsByTagName("name")[0].firstChild.data
        #print(label)
        if label == 'person'and count<3000:
            os.remove(xmlfilepath)
            os.remove(imagepath)
            print(imagepath)
            count+=1
            break

    if count>5000:
        break



